import pandas as pd
from sklearn import metrics
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV,KFold, cross_validate
from sklearn.linear_model import ElasticNetCV
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import cufflinks as cf
import plotly as py
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
cf.go_offline()
from sklearn.exceptions import ConvergenceWarning
ConvergenceWarning('ignore')
import warnings
warnings.filterwarnings("ignore")
df_train = pd.read_csv(r'/Users/allen/Desktop/MSDS/QTW/qtw_smu/Case Study 1: Linear Regression/superconduct/train.csv')
df_unique_m = pd.read_csv(r'/Users/allen/Desktop/MSDS/QTW/qtw_smu/Case Study 1: Linear Regression/superconduct/unique_m.csv')
#merge two dataframes on indexes
df_merge = pd.merge(df_train, df_unique_m, left_index=True, right_index=True)
#delete duplicate and unused column
df_merge = df_merge.drop(['critical_temp_y','material'], axis=1)
#rename column from merge
df_merge.rename(columns = {'critical_temp_x':'critical_temp'}, inplace = True)
df_merge.head()
| number_of_elements | mean_atomic_mass | wtd_mean_atomic_mass | gmean_atomic_mass | wtd_gmean_atomic_mass | entropy_atomic_mass | wtd_entropy_atomic_mass | range_atomic_mass | wtd_range_atomic_mass | std_atomic_mass | ... | Ir | Pt | Au | Hg | Tl | Pb | Bi | Po | At | Rn | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 4 | 88.944468 | 57.862692 | 66.361592 | 36.116612 | 1.181795 | 1.062396 | 122.90607 | 31.794921 | 51.968828 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0 | 0 | 0 |
| 1 | 5 | 92.729214 | 58.518416 | 73.132787 | 36.396602 | 1.449309 | 1.057755 | 122.90607 | 36.161939 | 47.094633 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0 | 0 | 0 |
| 2 | 4 | 88.944468 | 57.885242 | 66.361592 | 36.122509 | 1.181795 | 0.975980 | 122.90607 | 35.741099 | 51.968828 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0 | 0 | 0 |
| 3 | 4 | 88.944468 | 57.873967 | 66.361592 | 36.119560 | 1.181795 | 1.022291 | 122.90607 | 33.768010 | 51.968828 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0 | 0 | 0 |
| 4 | 4 | 88.944468 | 57.840143 | 66.361592 | 36.110716 | 1.181795 | 1.129224 | 122.90607 | 27.848743 | 51.968828 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0 | 0 | 0 |
5 rows × 168 columns
#show all columns..
pd.set_option('display.max_columns', None)
df_train.head()
| number_of_elements | mean_atomic_mass | wtd_mean_atomic_mass | gmean_atomic_mass | wtd_gmean_atomic_mass | entropy_atomic_mass | wtd_entropy_atomic_mass | range_atomic_mass | wtd_range_atomic_mass | std_atomic_mass | wtd_std_atomic_mass | mean_fie | wtd_mean_fie | gmean_fie | wtd_gmean_fie | entropy_fie | wtd_entropy_fie | range_fie | wtd_range_fie | std_fie | wtd_std_fie | mean_atomic_radius | wtd_mean_atomic_radius | gmean_atomic_radius | wtd_gmean_atomic_radius | entropy_atomic_radius | wtd_entropy_atomic_radius | range_atomic_radius | wtd_range_atomic_radius | std_atomic_radius | wtd_std_atomic_radius | mean_Density | wtd_mean_Density | gmean_Density | wtd_gmean_Density | entropy_Density | wtd_entropy_Density | range_Density | wtd_range_Density | std_Density | wtd_std_Density | mean_ElectronAffinity | wtd_mean_ElectronAffinity | gmean_ElectronAffinity | wtd_gmean_ElectronAffinity | entropy_ElectronAffinity | wtd_entropy_ElectronAffinity | range_ElectronAffinity | wtd_range_ElectronAffinity | std_ElectronAffinity | wtd_std_ElectronAffinity | mean_FusionHeat | wtd_mean_FusionHeat | gmean_FusionHeat | wtd_gmean_FusionHeat | entropy_FusionHeat | wtd_entropy_FusionHeat | range_FusionHeat | wtd_range_FusionHeat | std_FusionHeat | wtd_std_FusionHeat | mean_ThermalConductivity | wtd_mean_ThermalConductivity | gmean_ThermalConductivity | wtd_gmean_ThermalConductivity | entropy_ThermalConductivity | wtd_entropy_ThermalConductivity | range_ThermalConductivity | wtd_range_ThermalConductivity | std_ThermalConductivity | wtd_std_ThermalConductivity | mean_Valence | wtd_mean_Valence | gmean_Valence | wtd_gmean_Valence | entropy_Valence | wtd_entropy_Valence | range_Valence | wtd_range_Valence | std_Valence | wtd_std_Valence | critical_temp | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 4 | 88.944468 | 57.862692 | 66.361592 | 36.116612 | 1.181795 | 1.062396 | 122.90607 | 31.794921 | 51.968828 | 53.622535 | 775.425 | 1010.268571 | 718.152900 | 938.016780 | 1.305967 | 0.791488 | 810.6 | 735.985714 | 323.811808 | 355.562967 | 160.25 | 105.514286 | 136.126003 | 84.528423 | 1.259244 | 1.207040 | 205 | 42.914286 | 75.237540 | 69.235569 | 4654.35725 | 2961.502286 | 724.953211 | 53.543811 | 1.033129 | 0.814598 | 8958.571 | 1579.583429 | 3306.162897 | 3572.596624 | 81.8375 | 111.727143 | 60.123179 | 99.414682 | 1.159687 | 0.787382 | 127.05 | 80.987143 | 51.433712 | 42.558396 | 6.9055 | 3.846857 | 3.479475 | 1.040986 | 1.088575 | 0.994998 | 12.878 | 1.744571 | 4.599064 | 4.666920 | 107.756645 | 61.015189 | 7.062488 | 0.621979 | 0.308148 | 0.262848 | 399.97342 | 57.127669 | 168.854244 | 138.517163 | 2.25 | 2.257143 | 2.213364 | 2.219783 | 1.368922 | 1.066221 | 1 | 1.085714 | 0.433013 | 0.437059 | 29.0 |
| 1 | 5 | 92.729214 | 58.518416 | 73.132787 | 36.396602 | 1.449309 | 1.057755 | 122.90607 | 36.161939 | 47.094633 | 53.979870 | 766.440 | 1010.612857 | 720.605511 | 938.745413 | 1.544145 | 0.807078 | 810.6 | 743.164286 | 290.183029 | 354.963511 | 161.20 | 104.971429 | 141.465215 | 84.370167 | 1.508328 | 1.204115 | 205 | 50.571429 | 67.321319 | 68.008817 | 5821.48580 | 3021.016571 | 1237.095080 | 54.095718 | 1.314442 | 0.914802 | 10488.571 | 1667.383429 | 3767.403176 | 3632.649185 | 90.8900 | 112.316429 | 69.833315 | 101.166398 | 1.427997 | 0.838666 | 127.05 | 81.207857 | 49.438167 | 41.667621 | 7.7844 | 3.796857 | 4.403790 | 1.035251 | 1.374977 | 1.073094 | 12.878 | 1.595714 | 4.473363 | 4.603000 | 172.205316 | 61.372331 | 16.064228 | 0.619735 | 0.847404 | 0.567706 | 429.97342 | 51.413383 | 198.554600 | 139.630922 | 2.00 | 2.257143 | 1.888175 | 2.210679 | 1.557113 | 1.047221 | 2 | 1.128571 | 0.632456 | 0.468606 | 26.0 |
| 2 | 4 | 88.944468 | 57.885242 | 66.361592 | 36.122509 | 1.181795 | 0.975980 | 122.90607 | 35.741099 | 51.968828 | 53.656268 | 775.425 | 1010.820000 | 718.152900 | 939.009036 | 1.305967 | 0.773620 | 810.6 | 743.164286 | 323.811808 | 354.804183 | 160.25 | 104.685714 | 136.126003 | 84.214573 | 1.259244 | 1.132547 | 205 | 49.314286 | 75.237540 | 67.797712 | 4654.35725 | 2999.159429 | 724.953211 | 53.974022 | 1.033129 | 0.760305 | 8958.571 | 1667.383429 | 3306.162897 | 3592.019281 | 81.8375 | 112.213571 | 60.123179 | 101.082152 | 1.159687 | 0.786007 | 127.05 | 81.207857 | 51.433712 | 41.639878 | 6.9055 | 3.822571 | 3.479475 | 1.037439 | 1.088575 | 0.927479 | 12.878 | 1.757143 | 4.599064 | 4.649635 | 107.756645 | 60.943760 | 7.062488 | 0.619095 | 0.308148 | 0.250477 | 399.97342 | 57.127669 | 168.854244 | 138.540613 | 2.25 | 2.271429 | 2.213364 | 2.232679 | 1.368922 | 1.029175 | 1 | 1.114286 | 0.433013 | 0.444697 | 19.0 |
| 3 | 4 | 88.944468 | 57.873967 | 66.361592 | 36.119560 | 1.181795 | 1.022291 | 122.90607 | 33.768010 | 51.968828 | 53.639405 | 775.425 | 1010.544286 | 718.152900 | 938.512777 | 1.305967 | 0.783207 | 810.6 | 739.575000 | 323.811808 | 355.183884 | 160.25 | 105.100000 | 136.126003 | 84.371352 | 1.259244 | 1.173033 | 205 | 46.114286 | 75.237540 | 68.521665 | 4654.35725 | 2980.330857 | 724.953211 | 53.758486 | 1.033129 | 0.788889 | 8958.571 | 1623.483429 | 3306.162897 | 3582.370597 | 81.8375 | 111.970357 | 60.123179 | 100.244950 | 1.159687 | 0.786900 | 127.05 | 81.097500 | 51.433712 | 42.102344 | 6.9055 | 3.834714 | 3.479475 | 1.039211 | 1.088575 | 0.964031 | 12.878 | 1.744571 | 4.599064 | 4.658301 | 107.756645 | 60.979474 | 7.062488 | 0.620535 | 0.308148 | 0.257045 | 399.97342 | 57.127669 | 168.854244 | 138.528893 | 2.25 | 2.264286 | 2.213364 | 2.226222 | 1.368922 | 1.048834 | 1 | 1.100000 | 0.433013 | 0.440952 | 22.0 |
| 4 | 4 | 88.944468 | 57.840143 | 66.361592 | 36.110716 | 1.181795 | 1.129224 | 122.90607 | 27.848743 | 51.968828 | 53.588771 | 775.425 | 1009.717143 | 718.152900 | 937.025573 | 1.305967 | 0.805230 | 810.6 | 728.807143 | 323.811808 | 356.319281 | 160.25 | 106.342857 | 136.126003 | 84.843442 | 1.259244 | 1.261194 | 205 | 36.514286 | 75.237540 | 70.634448 | 4654.35725 | 2923.845143 | 724.953211 | 53.117029 | 1.033129 | 0.859811 | 8958.571 | 1491.783429 | 3306.162897 | 3552.668664 | 81.8375 | 111.240714 | 60.123179 | 97.774719 | 1.159687 | 0.787396 | 127.05 | 80.766429 | 51.433712 | 43.452059 | 6.9055 | 3.871143 | 3.479475 | 1.044545 | 1.088575 | 1.044970 | 12.878 | 1.744571 | 4.599064 | 4.684014 | 107.756645 | 61.086617 | 7.062488 | 0.624878 | 0.308148 | 0.272820 | 399.97342 | 57.127669 | 168.854244 | 138.493671 | 2.25 | 2.242857 | 2.213364 | 2.206963 | 1.368922 | 1.096052 | 1 | 1.057143 | 0.433013 | 0.428809 | 23.0 |
df_unique_m.head()
| H | He | Li | Be | B | C | N | O | F | Ne | Na | Mg | Al | Si | P | S | Cl | Ar | K | Ca | Sc | Ti | V | Cr | Mn | Fe | Co | Ni | Cu | Zn | Ga | Ge | As | Se | Br | Kr | Rb | Sr | Y | Zr | Nb | Mo | Tc | Ru | Rh | Pd | Ag | Cd | In | Sn | Sb | Te | I | Xe | Cs | Ba | La | Ce | Pr | Nd | Pm | Sm | Eu | Gd | Tb | Dy | Ho | Er | Tm | Yb | Lu | Hf | Ta | W | Re | Os | Ir | Pt | Au | Hg | Tl | Pb | Bi | Po | At | Rn | critical_temp | material | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0.0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4.0 | 0.0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0 | 0.0 | 0.20 | 1.80 | 0.0 | 0.0 | 0.0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0 | 0 | 0 | 29.0 | Ba0.2La1.8Cu1O4 |
| 1 | 0.0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4.0 | 0.0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.9 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.1 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0 | 0.0 | 0.10 | 1.90 | 0.0 | 0.0 | 0.0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0 | 0 | 0 | 26.0 | Ba0.1La1.9Ag0.1Cu0.9O4 |
| 2 | 0.0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4.0 | 0.0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0 | 0.0 | 0.10 | 1.90 | 0.0 | 0.0 | 0.0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0 | 0 | 0 | 19.0 | Ba0.1La1.9Cu1O4 |
| 3 | 0.0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4.0 | 0.0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0 | 0.0 | 0.15 | 1.85 | 0.0 | 0.0 | 0.0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0 | 0 | 0 | 22.0 | Ba0.15La1.85Cu1O4 |
| 4 | 0.0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4.0 | 0.0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0 | 0.0 | 0.30 | 1.70 | 0.0 | 0.0 | 0.0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0 | 0 | 0 | 23.0 | Ba0.3La1.7Cu1O4 |
df_train_shape = df_train.shape
df_unique_m_shape = df_unique_m.shape
df_merge_shape = df_merge.shape
print(f'Train DataFrame Shape: {df_train_shape}')
print(f'Unique_m DataFrame Shape:{df_unique_m_shape}')
print(f'Merged DataFrame Shape:{df_merge_shape}')
Train DataFrame Shape: (21263, 82) Unique_m DataFrame Shape:(21263, 88) Merged DataFrame Shape:(21263, 168)
df_train.info(verbose = True)
<class 'pandas.core.frame.DataFrame'> RangeIndex: 21263 entries, 0 to 21262 Data columns (total 82 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 number_of_elements 21263 non-null int64 1 mean_atomic_mass 21263 non-null float64 2 wtd_mean_atomic_mass 21263 non-null float64 3 gmean_atomic_mass 21263 non-null float64 4 wtd_gmean_atomic_mass 21263 non-null float64 5 entropy_atomic_mass 21263 non-null float64 6 wtd_entropy_atomic_mass 21263 non-null float64 7 range_atomic_mass 21263 non-null float64 8 wtd_range_atomic_mass 21263 non-null float64 9 std_atomic_mass 21263 non-null float64 10 wtd_std_atomic_mass 21263 non-null float64 11 mean_fie 21263 non-null float64 12 wtd_mean_fie 21263 non-null float64 13 gmean_fie 21263 non-null float64 14 wtd_gmean_fie 21263 non-null float64 15 entropy_fie 21263 non-null float64 16 wtd_entropy_fie 21263 non-null float64 17 range_fie 21263 non-null float64 18 wtd_range_fie 21263 non-null float64 19 std_fie 21263 non-null float64 20 wtd_std_fie 21263 non-null float64 21 mean_atomic_radius 21263 non-null float64 22 wtd_mean_atomic_radius 21263 non-null float64 23 gmean_atomic_radius 21263 non-null float64 24 wtd_gmean_atomic_radius 21263 non-null float64 25 entropy_atomic_radius 21263 non-null float64 26 wtd_entropy_atomic_radius 21263 non-null float64 27 range_atomic_radius 21263 non-null int64 28 wtd_range_atomic_radius 21263 non-null float64 29 std_atomic_radius 21263 non-null float64 30 wtd_std_atomic_radius 21263 non-null float64 31 mean_Density 21263 non-null float64 32 wtd_mean_Density 21263 non-null float64 33 gmean_Density 21263 non-null float64 34 wtd_gmean_Density 21263 non-null float64 35 entropy_Density 21263 non-null float64 36 wtd_entropy_Density 21263 non-null float64 37 range_Density 21263 non-null float64 38 wtd_range_Density 21263 non-null float64 39 std_Density 21263 non-null float64 40 wtd_std_Density 21263 non-null float64 41 mean_ElectronAffinity 21263 non-null float64 42 wtd_mean_ElectronAffinity 21263 non-null float64 43 gmean_ElectronAffinity 21263 non-null float64 44 wtd_gmean_ElectronAffinity 21263 non-null float64 45 entropy_ElectronAffinity 21263 non-null float64 46 wtd_entropy_ElectronAffinity 21263 non-null float64 47 range_ElectronAffinity 21263 non-null float64 48 wtd_range_ElectronAffinity 21263 non-null float64 49 std_ElectronAffinity 21263 non-null float64 50 wtd_std_ElectronAffinity 21263 non-null float64 51 mean_FusionHeat 21263 non-null float64 52 wtd_mean_FusionHeat 21263 non-null float64 53 gmean_FusionHeat 21263 non-null float64 54 wtd_gmean_FusionHeat 21263 non-null float64 55 entropy_FusionHeat 21263 non-null float64 56 wtd_entropy_FusionHeat 21263 non-null float64 57 range_FusionHeat 21263 non-null float64 58 wtd_range_FusionHeat 21263 non-null float64 59 std_FusionHeat 21263 non-null float64 60 wtd_std_FusionHeat 21263 non-null float64 61 mean_ThermalConductivity 21263 non-null float64 62 wtd_mean_ThermalConductivity 21263 non-null float64 63 gmean_ThermalConductivity 21263 non-null float64 64 wtd_gmean_ThermalConductivity 21263 non-null float64 65 entropy_ThermalConductivity 21263 non-null float64 66 wtd_entropy_ThermalConductivity 21263 non-null float64 67 range_ThermalConductivity 21263 non-null float64 68 wtd_range_ThermalConductivity 21263 non-null float64 69 std_ThermalConductivity 21263 non-null float64 70 wtd_std_ThermalConductivity 21263 non-null float64 71 mean_Valence 21263 non-null float64 72 wtd_mean_Valence 21263 non-null float64 73 gmean_Valence 21263 non-null float64 74 wtd_gmean_Valence 21263 non-null float64 75 entropy_Valence 21263 non-null float64 76 wtd_entropy_Valence 21263 non-null float64 77 range_Valence 21263 non-null int64 78 wtd_range_Valence 21263 non-null float64 79 std_Valence 21263 non-null float64 80 wtd_std_Valence 21263 non-null float64 81 critical_temp 21263 non-null float64 dtypes: float64(79), int64(3) memory usage: 13.3 MB
df_unique_m.info(verbose = True)
<class 'pandas.core.frame.DataFrame'> RangeIndex: 21263 entries, 0 to 21262 Data columns (total 88 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 H 21263 non-null float64 1 He 21263 non-null int64 2 Li 21263 non-null float64 3 Be 21263 non-null float64 4 B 21263 non-null float64 5 C 21263 non-null float64 6 N 21263 non-null float64 7 O 21263 non-null float64 8 F 21263 non-null float64 9 Ne 21263 non-null int64 10 Na 21263 non-null float64 11 Mg 21263 non-null float64 12 Al 21263 non-null float64 13 Si 21263 non-null float64 14 P 21263 non-null float64 15 S 21263 non-null float64 16 Cl 21263 non-null float64 17 Ar 21263 non-null int64 18 K 21263 non-null float64 19 Ca 21263 non-null float64 20 Sc 21263 non-null float64 21 Ti 21263 non-null float64 22 V 21263 non-null float64 23 Cr 21263 non-null float64 24 Mn 21263 non-null float64 25 Fe 21263 non-null float64 26 Co 21263 non-null float64 27 Ni 21263 non-null float64 28 Cu 21263 non-null float64 29 Zn 21263 non-null float64 30 Ga 21263 non-null float64 31 Ge 21263 non-null float64 32 As 21263 non-null float64 33 Se 21263 non-null float64 34 Br 21263 non-null float64 35 Kr 21263 non-null int64 36 Rb 21263 non-null float64 37 Sr 21263 non-null float64 38 Y 21263 non-null float64 39 Zr 21263 non-null float64 40 Nb 21263 non-null float64 41 Mo 21263 non-null float64 42 Tc 21263 non-null float64 43 Ru 21263 non-null float64 44 Rh 21263 non-null float64 45 Pd 21263 non-null float64 46 Ag 21263 non-null float64 47 Cd 21263 non-null float64 48 In 21263 non-null float64 49 Sn 21263 non-null float64 50 Sb 21263 non-null float64 51 Te 21263 non-null float64 52 I 21263 non-null float64 53 Xe 21263 non-null int64 54 Cs 21263 non-null float64 55 Ba 21263 non-null float64 56 La 21263 non-null float64 57 Ce 21263 non-null float64 58 Pr 21263 non-null float64 59 Nd 21263 non-null float64 60 Pm 21263 non-null int64 61 Sm 21263 non-null float64 62 Eu 21263 non-null float64 63 Gd 21263 non-null float64 64 Tb 21263 non-null float64 65 Dy 21263 non-null float64 66 Ho 21263 non-null float64 67 Er 21263 non-null float64 68 Tm 21263 non-null float64 69 Yb 21263 non-null float64 70 Lu 21263 non-null float64 71 Hf 21263 non-null float64 72 Ta 21263 non-null float64 73 W 21263 non-null float64 74 Re 21263 non-null float64 75 Os 21263 non-null float64 76 Ir 21263 non-null float64 77 Pt 21263 non-null float64 78 Au 21263 non-null float64 79 Hg 21263 non-null float64 80 Tl 21263 non-null float64 81 Pb 21263 non-null float64 82 Bi 21263 non-null float64 83 Po 21263 non-null int64 84 At 21263 non-null int64 85 Rn 21263 non-null int64 86 critical_temp 21263 non-null float64 87 material 21263 non-null object dtypes: float64(78), int64(9), object(1) memory usage: 14.3+ MB
df_merge.info(verbose = True)
<class 'pandas.core.frame.DataFrame'> RangeIndex: 21263 entries, 0 to 21262 Data columns (total 168 columns): # Column Dtype --- ------ ----- 0 number_of_elements int64 1 mean_atomic_mass float64 2 wtd_mean_atomic_mass float64 3 gmean_atomic_mass float64 4 wtd_gmean_atomic_mass float64 5 entropy_atomic_mass float64 6 wtd_entropy_atomic_mass float64 7 range_atomic_mass float64 8 wtd_range_atomic_mass float64 9 std_atomic_mass float64 10 wtd_std_atomic_mass float64 11 mean_fie float64 12 wtd_mean_fie float64 13 gmean_fie float64 14 wtd_gmean_fie float64 15 entropy_fie float64 16 wtd_entropy_fie float64 17 range_fie float64 18 wtd_range_fie float64 19 std_fie float64 20 wtd_std_fie float64 21 mean_atomic_radius float64 22 wtd_mean_atomic_radius float64 23 gmean_atomic_radius float64 24 wtd_gmean_atomic_radius float64 25 entropy_atomic_radius float64 26 wtd_entropy_atomic_radius float64 27 range_atomic_radius int64 28 wtd_range_atomic_radius float64 29 std_atomic_radius float64 30 wtd_std_atomic_radius float64 31 mean_Density float64 32 wtd_mean_Density float64 33 gmean_Density float64 34 wtd_gmean_Density float64 35 entropy_Density float64 36 wtd_entropy_Density float64 37 range_Density float64 38 wtd_range_Density float64 39 std_Density float64 40 wtd_std_Density float64 41 mean_ElectronAffinity float64 42 wtd_mean_ElectronAffinity float64 43 gmean_ElectronAffinity float64 44 wtd_gmean_ElectronAffinity float64 45 entropy_ElectronAffinity float64 46 wtd_entropy_ElectronAffinity float64 47 range_ElectronAffinity float64 48 wtd_range_ElectronAffinity float64 49 std_ElectronAffinity float64 50 wtd_std_ElectronAffinity float64 51 mean_FusionHeat float64 52 wtd_mean_FusionHeat float64 53 gmean_FusionHeat float64 54 wtd_gmean_FusionHeat float64 55 entropy_FusionHeat float64 56 wtd_entropy_FusionHeat float64 57 range_FusionHeat float64 58 wtd_range_FusionHeat float64 59 std_FusionHeat float64 60 wtd_std_FusionHeat float64 61 mean_ThermalConductivity float64 62 wtd_mean_ThermalConductivity float64 63 gmean_ThermalConductivity float64 64 wtd_gmean_ThermalConductivity float64 65 entropy_ThermalConductivity float64 66 wtd_entropy_ThermalConductivity float64 67 range_ThermalConductivity float64 68 wtd_range_ThermalConductivity float64 69 std_ThermalConductivity float64 70 wtd_std_ThermalConductivity float64 71 mean_Valence float64 72 wtd_mean_Valence float64 73 gmean_Valence float64 74 wtd_gmean_Valence float64 75 entropy_Valence float64 76 wtd_entropy_Valence float64 77 range_Valence int64 78 wtd_range_Valence float64 79 std_Valence float64 80 wtd_std_Valence float64 81 critical_temp float64 82 H float64 83 He int64 84 Li float64 85 Be float64 86 B float64 87 C float64 88 N float64 89 O float64 90 F float64 91 Ne int64 92 Na float64 93 Mg float64 94 Al float64 95 Si float64 96 P float64 97 S float64 98 Cl float64 99 Ar int64 100 K float64 101 Ca float64 102 Sc float64 103 Ti float64 104 V float64 105 Cr float64 106 Mn float64 107 Fe float64 108 Co float64 109 Ni float64 110 Cu float64 111 Zn float64 112 Ga float64 113 Ge float64 114 As float64 115 Se float64 116 Br float64 117 Kr int64 118 Rb float64 119 Sr float64 120 Y float64 121 Zr float64 122 Nb float64 123 Mo float64 124 Tc float64 125 Ru float64 126 Rh float64 127 Pd float64 128 Ag float64 129 Cd float64 130 In float64 131 Sn float64 132 Sb float64 133 Te float64 134 I float64 135 Xe int64 136 Cs float64 137 Ba float64 138 La float64 139 Ce float64 140 Pr float64 141 Nd float64 142 Pm int64 143 Sm float64 144 Eu float64 145 Gd float64 146 Tb float64 147 Dy float64 148 Ho float64 149 Er float64 150 Tm float64 151 Yb float64 152 Lu float64 153 Hf float64 154 Ta float64 155 W float64 156 Re float64 157 Os float64 158 Ir float64 159 Pt float64 160 Au float64 161 Hg float64 162 Tl float64 163 Pb float64 164 Bi float64 165 Po int64 166 At int64 167 Rn int64 dtypes: float64(156), int64(12) memory usage: 27.3 MB
df_merge.describe()
| number_of_elements | mean_atomic_mass | wtd_mean_atomic_mass | gmean_atomic_mass | wtd_gmean_atomic_mass | entropy_atomic_mass | wtd_entropy_atomic_mass | range_atomic_mass | wtd_range_atomic_mass | std_atomic_mass | wtd_std_atomic_mass | mean_fie | wtd_mean_fie | gmean_fie | wtd_gmean_fie | entropy_fie | wtd_entropy_fie | range_fie | wtd_range_fie | std_fie | wtd_std_fie | mean_atomic_radius | wtd_mean_atomic_radius | gmean_atomic_radius | wtd_gmean_atomic_radius | entropy_atomic_radius | wtd_entropy_atomic_radius | range_atomic_radius | wtd_range_atomic_radius | std_atomic_radius | wtd_std_atomic_radius | mean_Density | wtd_mean_Density | gmean_Density | wtd_gmean_Density | entropy_Density | wtd_entropy_Density | range_Density | wtd_range_Density | std_Density | wtd_std_Density | mean_ElectronAffinity | wtd_mean_ElectronAffinity | gmean_ElectronAffinity | wtd_gmean_ElectronAffinity | entropy_ElectronAffinity | wtd_entropy_ElectronAffinity | range_ElectronAffinity | wtd_range_ElectronAffinity | std_ElectronAffinity | wtd_std_ElectronAffinity | mean_FusionHeat | wtd_mean_FusionHeat | gmean_FusionHeat | wtd_gmean_FusionHeat | entropy_FusionHeat | wtd_entropy_FusionHeat | range_FusionHeat | wtd_range_FusionHeat | std_FusionHeat | wtd_std_FusionHeat | mean_ThermalConductivity | wtd_mean_ThermalConductivity | gmean_ThermalConductivity | wtd_gmean_ThermalConductivity | entropy_ThermalConductivity | wtd_entropy_ThermalConductivity | range_ThermalConductivity | wtd_range_ThermalConductivity | std_ThermalConductivity | wtd_std_ThermalConductivity | mean_Valence | wtd_mean_Valence | gmean_Valence | wtd_gmean_Valence | entropy_Valence | wtd_entropy_Valence | range_Valence | wtd_range_Valence | std_Valence | wtd_std_Valence | critical_temp | H | He | Li | Be | B | C | N | O | F | Ne | Na | Mg | Al | Si | P | S | Cl | Ar | K | Ca | Sc | Ti | V | Cr | Mn | Fe | Co | Ni | Cu | Zn | Ga | Ge | As | Se | Br | Kr | Rb | Sr | Y | Zr | Nb | Mo | Tc | Ru | Rh | Pd | Ag | Cd | In | Sn | Sb | Te | I | Xe | Cs | Ba | La | Ce | Pr | Nd | Pm | Sm | Eu | Gd | Tb | Dy | Ho | Er | Tm | Yb | Lu | Hf | Ta | W | Re | Os | Ir | Pt | Au | Hg | Tl | Pb | Bi | Po | At | Rn | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.0 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.0 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.0 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.0 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.0 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.0 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.000000 | 21263.0 | 21263.0 | 21263.0 |
| mean | 4.115224 | 87.557631 | 72.988310 | 71.290627 | 58.539916 | 1.165608 | 1.063884 | 115.601251 | 33.225218 | 44.391893 | 41.448045 | 769.614748 | 870.442317 | 737.474751 | 832.769649 | 1.299172 | 0.926726 | 572.222612 | 483.517264 | 215.631279 | 224.050033 | 157.983101 | 134.720039 | 144.448738 | 120.989330 | 1.267756 | 1.131138 | 139.325025 | 51.369885 | 51.601267 | 52.340465 | 6111.465214 | 5267.188547 | 3460.692235 | 3117.241110 | 1.072425 | 0.856037 | 8665.438818 | 2902.736814 | 3416.910784 | 3319.170628 | 76.879751 | 92.717486 | 54.359502 | 72.416225 | 1.070250 | 0.770757 | 120.730514 | 59.332637 | 48.912207 | 44.409356 | 14.296113 | 13.848001 | 10.136977 | 10.141161 | 1.093343 | 0.914065 | 21.138994 | 8.218528 | 8.323333 | 7.717576 | 89.706911 | 81.549080 | 29.841727 | 27.308061 | 0.727630 | 0.539991 | 250.893443 | 62.033066 | 98.943993 | 96.234051 | 3.198228 | 3.153127 | 3.056536 | 3.055885 | 1.295682 | 1.052841 | 2.041010 | 1.483007 | 0.839342 | 0.673987 | 34.421219 | 0.017685 | 0.0 | 0.012125 | 0.034638 | 0.142594 | 0.384968 | 0.013284 | 3.009129 | 0.014874 | 0.0 | 0.008892 | 0.026772 | 0.061678 | 0.189889 | 0.028143 | 0.106246 | 0.009050 | 0.0 | 0.016042 | 0.258347 | 0.010919 | 0.156817 | 0.224782 | 0.006119 | 0.003191 | 0.153182 | 0.035323 | 0.090182 | 1.276751 | 0.014034 | 0.073997 | 0.082556 | 0.155197 | 0.078662 | 0.003940 | 0.0 | 0.007799 | 0.326909 | 0.177556 | 0.370901 | 0.442349 | 0.146367 | 0.002291 | 0.055325 | 0.068072 | 0.085034 | 0.007834 | 0.009152 | 0.049468 | 0.120994 | 0.101269 | 0.040491 | 0.004744 | 0.0 | 0.004129 | 0.568440 | 0.264953 | 0.030662 | 0.041494 | 0.039666 | 0.0 | 0.021992 | 0.017821 | 0.023959 | 0.002857 | 0.009536 | 0.008832 | 0.014217 | 0.008909 | 0.012716 | 0.026849 | 0.009168 | 0.036086 | 0.010424 | 0.038206 | 0.022512 | 0.061558 | 0.034108 | 0.020535 | 0.036663 | 0.047954 | 0.042461 | 0.201009 | 0.0 | 0.0 | 0.0 |
| std | 1.439295 | 29.676497 | 33.490406 | 31.030272 | 36.651067 | 0.364930 | 0.401423 | 54.626887 | 26.967752 | 20.035430 | 19.983544 | 87.488694 | 143.278200 | 78.327275 | 119.772520 | 0.381935 | 0.334018 | 309.614442 | 224.042874 | 109.966774 | 127.927104 | 20.147288 | 28.801567 | 22.090958 | 35.837843 | 0.375411 | 0.407159 | 67.272228 | 35.019356 | 22.898396 | 25.294524 | 2846.785185 | 3221.314506 | 3703.256370 | 3975.122587 | 0.342356 | 0.319761 | 4097.126831 | 2398.471020 | 1673.624915 | 1611.799629 | 27.701890 | 32.276387 | 29.007425 | 31.648444 | 0.343391 | 0.285986 | 58.700327 | 28.620409 | 21.740521 | 20.429293 | 11.300188 | 14.279335 | 10.065901 | 13.134007 | 0.375932 | 0.370125 | 20.370620 | 11.414066 | 8.671651 | 7.288239 | 38.517485 | 45.519256 | 34.059581 | 40.191150 | 0.325976 | 0.318248 | 158.703557 | 43.123317 | 60.143272 | 63.710355 | 1.044611 | 1.191249 | 1.046257 | 1.174815 | 0.393155 | 0.380291 | 1.242345 | 0.978176 | 0.484676 | 0.455580 | 34.254362 | 0.267220 | 0.0 | 0.129552 | 0.848541 | 1.044486 | 4.408032 | 0.150427 | 3.811649 | 0.132119 | 0.0 | 0.101685 | 0.271606 | 1.126254 | 2.217277 | 0.466710 | 0.760821 | 0.119717 | 0.0 | 0.138187 | 0.902732 | 0.185651 | 2.728139 | 3.407763 | 0.254272 | 0.129449 | 0.713075 | 0.580672 | 0.982521 | 2.079375 | 0.403316 | 1.115005 | 1.021279 | 1.076049 | 0.676294 | 0.083907 | 0.0 | 0.121254 | 0.763625 | 0.429953 | 4.846459 | 4.848246 | 2.084302 | 0.064728 | 0.770327 | 1.005898 | 1.554018 | 0.167831 | 0.688729 | 0.521820 | 1.886951 | 1.839020 | 0.718043 | 0.088480 | 0.0 | 0.077676 | 0.983288 | 2.320822 | 0.173147 | 1.282059 | 0.224657 | 0.0 | 0.183173 | 0.151433 | 0.155860 | 0.064737 | 0.104153 | 0.098728 | 0.131417 | 0.130455 | 0.214806 | 0.276861 | 0.208969 | 0.851380 | 0.164628 | 1.177476 | 0.282265 | 0.864859 | 0.307888 | 0.717975 | 0.205846 | 0.272298 | 0.274365 | 0.655927 | 0.0 | 0.0 | 0.0 |
| min | 1.000000 | 6.941000 | 6.423452 | 5.320573 | 1.960849 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 375.500000 | 375.500000 | 375.500000 | 375.500000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 48.000000 | 48.000000 | 48.000000 | 48.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 1.429000 | 1.429000 | 1.429000 | 0.686245 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 1.500000 | 1.500000 | 1.500000 | 1.500000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.222000 | 0.222000 | 0.222000 | 0.222000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.026580 | 0.026580 | 0.026580 | 0.022952 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000210 | 0.000000 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.0 | 0.0 |
| 25% | 3.000000 | 72.458076 | 52.143839 | 58.041225 | 35.248990 | 0.966676 | 0.775363 | 78.512902 | 16.824174 | 32.890369 | 28.539377 | 723.740000 | 738.946339 | 692.541331 | 720.108284 | 1.085871 | 0.753757 | 262.400000 | 291.088889 | 114.135763 | 92.994286 | 149.333333 | 112.127359 | 133.542493 | 89.210097 | 1.066389 | 0.852181 | 80.000000 | 28.598137 | 35.112518 | 32.016958 | 4513.500000 | 2999.158291 | 883.117278 | 66.746836 | 0.913959 | 0.688693 | 6648.000000 | 1656.847429 | 2819.497063 | 2564.342926 | 62.090000 | 73.350000 | 33.700512 | 50.772124 | 0.890589 | 0.660662 | 86.700000 | 34.036000 | 38.372410 | 33.440123 | 7.588667 | 5.033407 | 4.109978 | 1.322127 | 0.833333 | 0.672732 | 12.878000 | 2.329309 | 4.261340 | 4.603491 | 61.000000 | 54.180953 | 8.339818 | 1.087284 | 0.457810 | 0.250677 | 86.382000 | 29.349419 | 37.933172 | 31.985437 | 2.333333 | 2.116732 | 2.279705 | 2.091251 | 1.060857 | 0.775678 | 1.000000 | 0.921454 | 0.451754 | 0.306892 | 5.365000 | 0.000000 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.0 | 0.0 |
| 50% | 4.000000 | 84.922750 | 60.696571 | 66.361592 | 39.918385 | 1.199541 | 1.146783 | 122.906070 | 26.636008 | 45.123500 | 44.285984 | 764.900000 | 889.966667 | 727.960610 | 856.202765 | 1.356236 | 0.916843 | 764.100000 | 510.440000 | 266.373871 | 258.449503 | 160.250000 | 125.970297 | 142.807563 | 113.181369 | 1.330735 | 1.242878 | 171.000000 | 43.000000 | 58.663106 | 59.932929 | 5329.085800 | 4303.421500 | 1339.974702 | 1515.364631 | 1.090610 | 0.882747 | 8958.571000 | 2082.956581 | 3301.890502 | 3625.631828 | 73.100000 | 102.856863 | 51.470113 | 73.173958 | 1.138284 | 0.781205 | 127.050000 | 71.156250 | 51.125720 | 48.029866 | 9.304400 | 8.330667 | 5.253498 | 4.929787 | 1.112098 | 0.994998 | 12.878000 | 3.436400 | 4.948155 | 5.500675 | 96.504430 | 73.333333 | 14.287643 | 6.096120 | 0.738694 | 0.545783 | 399.795000 | 56.556240 | 135.762089 | 113.556983 | 2.833333 | 2.618182 | 2.615321 | 2.434057 | 1.368922 | 1.166532 | 2.000000 | 1.063077 | 0.800000 | 0.500000 | 20.000000 | 0.000000 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 1.000000 | 0.000000 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.900000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.0 | 0.0 |
| 75% | 5.000000 | 100.404410 | 86.103540 | 78.116681 | 73.113234 | 1.444537 | 1.359418 | 154.119320 | 38.356908 | 59.322812 | 53.629284 | 796.320000 | 1004.117384 | 765.715174 | 937.575826 | 1.551120 | 1.061750 | 810.600000 | 690.703310 | 297.724924 | 342.656991 | 169.857143 | 158.265231 | 155.938199 | 150.988640 | 1.512348 | 1.425684 | 205.000000 | 60.224491 | 69.424491 | 73.777278 | 6728.000000 | 6416.333333 | 5794.965188 | 5766.015191 | 1.323930 | 1.080939 | 9778.571000 | 3409.026316 | 4004.273231 | 3959.191394 | 85.504167 | 110.738462 | 67.505900 | 89.975670 | 1.345894 | 0.877541 | 138.630000 | 76.706965 | 56.221787 | 53.320838 | 17.114444 | 18.514286 | 13.600037 | 16.428652 | 1.378110 | 1.157379 | 23.200000 | 10.498780 | 9.041230 | 8.017581 | 111.005316 | 99.062911 | 42.371302 | 47.308041 | 0.962218 | 0.777353 | 399.973420 | 91.869245 | 153.806272 | 162.711144 | 4.000000 | 4.026201 | 3.727919 | 3.914868 | 1.589027 | 1.330801 | 3.000000 | 1.918400 | 1.200000 | 1.020436 | 63.000000 | 0.000000 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 6.800000 | 0.000000 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 2.815000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.000000 | 1.350000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.0 | 0.0 |
| max | 9.000000 | 208.980400 | 208.980400 | 208.980400 | 208.980400 | 1.983797 | 1.958203 | 207.972460 | 205.589910 | 101.019700 | 101.019700 | 1313.100000 | 1348.028986 | 1313.100000 | 1327.593381 | 2.157777 | 2.038560 | 1304.500000 | 1251.855072 | 499.671949 | 479.162305 | 298.000000 | 298.000000 | 298.000000 | 298.000000 | 2.141961 | 1.903748 | 256.000000 | 240.164344 | 115.500000 | 97.140711 | 22590.000000 | 22590.000000 | 22590.000000 | 22590.000000 | 1.954297 | 1.703420 | 22588.571000 | 22434.160000 | 10724.374500 | 10410.932005 | 326.100000 | 326.100000 | 326.100000 | 326.100000 | 1.767732 | 1.675400 | 349.000000 | 218.696600 | 162.895331 | 169.075862 | 105.000000 | 105.000000 | 105.000000 | 105.000000 | 2.034410 | 1.747165 | 104.778000 | 102.675000 | 51.635000 | 51.680482 | 332.500000 | 406.960000 | 317.883627 | 376.032878 | 1.633977 | 1.612989 | 429.974170 | 401.440000 | 214.986150 | 213.300452 | 7.000000 | 7.000000 | 7.000000 | 7.000000 | 2.141963 | 1.949739 | 6.000000 | 6.992200 | 3.000000 | 3.000000 | 185.000000 | 14.000000 | 0.0 | 3.000000 | 40.000000 | 105.000000 | 120.000000 | 12.800000 | 66.000000 | 4.000000 | 0.0 | 4.000000 | 12.000000 | 99.925000 | 100.000000 | 20.000000 | 15.000000 | 3.000000 | 0.0 | 3.300000 | 24.000000 | 5.000000 | 75.000000 | 79.500000 | 34.900000 | 14.000000 | 30.000000 | 35.380000 | 45.000000 | 98.000000 | 20.000000 | 41.000000 | 46.000000 | 18.000000 | 19.000000 | 5.000000 | 0.0 | 4.000000 | 16.700000 | 9.000000 | 96.710000 | 99.976000 | 99.992000 | 6.000000 | 64.000000 | 45.000000 | 50.997450 | 7.000000 | 99.995000 | 31.500000 | 99.200000 | 83.500000 | 66.700000 | 4.000000 | 0.0 | 3.000000 | 24.000000 | 98.000000 | 4.998000 | 185.000000 | 6.000000 | 0.0 | 12.000000 | 6.000000 | 4.000000 | 5.000000 | 5.000000 | 5.000000 | 5.000000 | 5.000000 | 16.000000 | 7.000000 | 25.000000 | 55.000000 | 14.000000 | 97.240000 | 10.000000 | 45.000000 | 5.800000 | 64.000000 | 8.000000 | 7.000000 | 19.000000 | 14.000000 | 0.0 | 0.0 | 0.0 |
df_merge['critical_temp'].iplot(
kind='hist',
bins=100,
xTitle='Critical Temperature',
linecolor='black',
yTitle='count',
title='Histogram of Critical Temperature')
scatter_cols = ['number_of_elements','mean_atomic_mass','mean_atomic_radius','critical_temp']
df_scatter = df_merge[scatter_cols]
df_scatter.scatter_matrix()
# Subset of columns to transform
scale_cols = df_merge.columns[df_merge.columns != 'critical_temp']
# Scale Columns
sc = StandardScaler()
df_merge[scale_cols] = sc.fit_transform(df_merge[scale_cols])
#Specififying Stratified Kfold for cv.
kfcv = KFold(n_splits=10,random_state=0,shuffle=True)
#Set target and feature columns
target_col = ['critical_temp']
feature_cols = df_merge.loc[:, ~df_merge.columns.isin(target_col)].columns
y = df_merge.critical_temp
X = df_merge[feature_cols]
%%time
# Grid search for Linear Regression task 1
lr_grid={"l1_ratio":np.arange(0.0,1.0,0.1),
"tol": [1e-9,1e-8,1e-7,1e-6,1e-5, 1e-4, 1e-3, 1e-2, 1e-1],
"eps":[1e-3, 1e-2, 1e-1,1,10,100]
}
model=ElasticNetCV(random_state = 0,max_iter=10000)
model_gs=GridSearchCV(model,
lr_grid,
cv = kfcv,
n_jobs=-1,
scoring = "neg_root_mean_squared_error")
model_gs.fit(X,y)
best_params = model_gs.best_params_
print(f'Grid Search Best Parameters{best_params}')
Grid Search Best Parameters{'eps': 0.001, 'l1_ratio': 0.5, 'tol': 0.1}
CPU times: user 11 s, sys: 5.65 s, total: 16.7 s
Wall time: 9min 58s
model_ = ElasticNetCV(l1_ratio =.5 ,
tol =0.1,
eps =0.001,
random_state = 0,
max_iter = 10000)
model_score = cross_validate(model_, X, y,
scoring='neg_root_mean_squared_error',
cv=kfcv,
return_estimator=True,
n_jobs=-1,
return_train_score=True)
model_results = pd.DataFrame(model_score)
model_results.loc['mean'] = model_results.mean()
print(model_results.to_markdown())
| | fit_time | score_time | estimator | test_score | train_score | |:-----|-----------:|-------------:|:------------------------------------------------------|-------------:|--------------:| | 0 | 0.790638 | 0.00162292 | ElasticNetCV(max_iter=10000, random_state=0, tol=0.1) | -17.8198 | -17.6992 | | 1 | 0.790282 | 0.00154591 | ElasticNetCV(max_iter=10000, random_state=0, tol=0.1) | -19.5118 | -17.4999 | | 2 | 0.767968 | 0.00222802 | ElasticNetCV(max_iter=10000, random_state=0, tol=0.1) | -17.8285 | -17.6565 | | 3 | 0.786807 | 0.00218701 | ElasticNetCV(max_iter=10000, random_state=0, tol=0.1) | -17.6732 | -17.6302 | | 4 | 0.754597 | 0.00209999 | ElasticNetCV(max_iter=10000, random_state=0, tol=0.1) | -17.8176 | -17.703 | | 5 | 0.784165 | 0.0020647 | ElasticNetCV(max_iter=10000, random_state=0, tol=0.1) | -17.1766 | -17.7352 | | 6 | 0.762796 | 0.0016861 | ElasticNetCV(max_iter=10000, random_state=0, tol=0.1) | -17.8488 | -17.7298 | | 7 | 0.778274 | 0.0014472 | ElasticNetCV(max_iter=10000, random_state=0, tol=0.1) | -18.0257 | -17.6453 | | 8 | 0.762333 | 0.00171614 | ElasticNetCV(max_iter=10000, random_state=0, tol=0.1) | -17.7077 | -17.629 | | 9 | 0.74049 | 0.00208688 | ElasticNetCV(max_iter=10000, random_state=0, tol=0.1) | -17.8841 | -17.6424 | | mean | 0.771835 | 0.00186849 | nan | -17.9294 | -17.6571 |
#obtain average coefficent for each feature
df = pd.DataFrame()
for i in range(10):
df_ = pd.DataFrame(list(zip(abs(model_score['estimator'][i].coef_), X.columns)),columns = ['Coefficient','Feature'])
df = pd.concat([df_,df],axis=0)
avg_feat_coef = df.groupby('Feature', as_index=False)['Coefficient'].mean()
avg_feat_coef = avg_feat_coef.sort_values(by='Coefficient',ascending=False)
top_10_features = avg_feat_coef.head(10)
plt.style.use('ggplot')
plt.barh(top_10_features['Feature'],top_10_features['Coefficient'])
plt.title('Top 10 Features in Elastic Net CV Model')
plt.ylabel('Feature')
plt.xlabel('Coefficient Value')
plt.yticks(rotation=30, va='top')
plt.show()